In [1]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.templates.default = "plotly_white"
In [2]:
df=pd.read_csv("/Users/abelabykuriakose/downloads/credit_scoring.csv")
In [3]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 12 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Age                        1000 non-null   int64  
 1   Gender                     1000 non-null   object 
 2   Marital Status             1000 non-null   object 
 3   Education Level            1000 non-null   object 
 4   Employment Status          1000 non-null   object 
 5   Credit Utilization Ratio   1000 non-null   float64
 6   Payment History            1000 non-null   float64
 7   Number of Credit Accounts  1000 non-null   int64  
 8   Loan Amount                1000 non-null   int64  
 9   Interest Rate              1000 non-null   float64
 10  Loan Term                  1000 non-null   int64  
 11  Type of Loan               1000 non-null   object 
dtypes: float64(3), int64(4), object(5)
memory usage: 93.9+ KB
In [4]:
df.head()
Out[4]:
Age Gender Marital Status Education Level Employment Status Credit Utilization Ratio Payment History Number of Credit Accounts Loan Amount Interest Rate Loan Term Type of Loan
0 60 Male Married Master Employed 0.22 2685.0 2 4675000 2.65 48 Personal Loan
1 25 Male Married High School Unemployed 0.20 2371.0 9 3619000 5.19 60 Auto Loan
2 30 Female Single Master Employed 0.22 2771.0 6 957000 2.76 12 Auto Loan
3 58 Female Married PhD Unemployed 0.12 1371.0 2 4731000 6.57 60 Auto Loan
4 32 Male Married Bachelor Self-Employed 0.99 828.0 2 3289000 6.28 36 Personal Loan
In [5]:
credit_utilization_fig = px.box(df, y='Credit Utilization Ratio',
                                title='Credit Utilization Ratio Distribution')
credit_utilization_fig.show()
In [6]:
loan_amount_fig = px.histogram(df, x='Loan Amount', 
                               nbins=20, 
                               title='Loan Amount Distribution')
loan_amount_fig.show()
In [7]:
import seaborn as sns
import matplotlib.pyplot as plt

numeric_df = df[['Credit Utilization Ratio', 
                   'Payment History', 
                   'Number of Credit Accounts', 
                   'Loan Amount', 'Interest Rate', 
                   'Loan Term']]
corr=numeric_df.corr()

fig_corr_heatmap=sns.heatmap(corr, annot=True, cmap='coolwarm')

plt.show()                       
/Applications/anaconda3/lib/python3.8/site-packages/scipy/__init__.py:138: UserWarning:

A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.24.2)

In [ ]:
 
In [8]:
# Define the mapping for categorical features
education_level_mapping = {'High School': 1, 'Bachelor': 2, 'Master': 3, 'PhD': 4}
employment_status_mapping = {'Unemployed': 0, 'Employed': 1, 'Self-Employed': 2}

# Apply mapping to categorical features
df['Education Level'] = df['Education Level'].map(education_level_mapping)
df['Employment Status'] = df['Employment Status'].map(employment_status_mapping)

# Calculate credit scores using the complete FICO formula
credit_scores = []

for index, row in df.iterrows():
    payment_history = row['Payment History']
    credit_utilization_ratio = row['Credit Utilization Ratio']
    number_of_credit_accounts = row['Number of Credit Accounts']
    education_level = row['Education Level']
    employment_status = row['Employment Status']

    # Apply the FICO formula to calculate the credit score
    credit_score = (payment_history * 0.35) + (credit_utilization_ratio * 0.30) + (number_of_credit_accounts * 0.15) + (education_level * 0.10) + (employment_status * 0.10)
    credit_scores.append(credit_score)

# Add the credit scores as a new column to the DataFrame
df['Credit Score'] = credit_scores
 
In [9]:
df
Out[9]:
Age Gender Marital Status Education Level Employment Status Credit Utilization Ratio Payment History Number of Credit Accounts Loan Amount Interest Rate Loan Term Type of Loan Credit Score
0 60 Male Married 3 1 0.22 2685.0 2 4675000 2.65 48 Personal Loan 940.516
1 25 Male Married 1 0 0.20 2371.0 9 3619000 5.19 60 Auto Loan 831.360
2 30 Female Single 3 1 0.22 2771.0 6 957000 2.76 12 Auto Loan 971.216
3 58 Female Married 4 0 0.12 1371.0 2 4731000 6.57 60 Auto Loan 480.586
4 32 Male Married 2 2 0.99 828.0 2 3289000 6.28 36 Personal Loan 290.797
... ... ... ... ... ... ... ... ... ... ... ... ... ...
995 59 Male Divorced 1 1 0.74 1285.0 8 3530000 12.99 48 Auto Loan 451.372
996 64 Male Divorced 2 0 0.77 1857.0 2 1377000 18.02 60 Home Loan 650.681
997 63 Female Single 3 2 0.18 2628.0 10 2443000 18.95 12 Personal Loan 921.854
998 51 Female Married 4 2 0.32 1142.0 3 1301000 1.80 24 Auto Loan 400.846
999 37 Female Married 3 2 0.17 1028.0 5 4182000 9.34 24 Auto Loan 361.101

1000 rows × 13 columns

In [13]:
from sklearn.cluster import KMeans

X = df[['Credit Score']]
kmeans = KMeans(n_clusters=4, n_init=10, random_state=42)
kmeans.fit(X)
df['Segment'] = kmeans.labels_
In [16]:
# Convert the 'Segment' column to category data type
df['Segment'] = df['Segment'].astype('category')

# Visualize the segments using Plotly
fig = px.scatter(df, x=df.index, y='Credit Score', color='Segment',
                 color_discrete_sequence=['green', 'blue', 'yellow', 'red'])
fig.update_layout(
    xaxis_title='Customer Index',
    yaxis_title='Credit Score',
    title='Customer Segmentation based on Credit Scores'
)
fig.show()
In [18]:
df['Segment'] = df['Segment'].map({2: 'Very Low', 
                                       0: 'Low',
                                       1: 'Good',
                                       3: "Excellent"})

# Convert the 'Segment' column to category data type
df['Segment'] = df['Segment'].astype('category')

# Visualize the segments using Plotly
fig = px.scatter(df, x=df.index, y='Credit Score', color='Segment',
                 color_discrete_sequence=['green', 'blue', 'yellow', 'red'])
fig.update_layout(
    xaxis_title='Customer Index',
    yaxis_title='Credit Score',
    title='Customer Segmentation based on Credit Scores'
)
fig.show()
In [ ]: